/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <regex.h>
#include <signal.h>

#if (WIN32|WINNT)
#include <time.h>
#endif

#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif

#ifdef HAVE_LIBUTIL_H
#include <libutil.h>
#endif

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif

#include "udm_common.h"
#include "udm_utils.h"
#include "udm_log.h"
#include "udm_conf.h"
#include "udm_indexer.h"
#include "udm_robots.h"
#include "udm_db.h"
#include "udm_url.h"
#include "udm_parser.h"
#include "udm_proto.h"
#include "udm_hrefs.h"
#include "udm_mutex.h"
#include "udm_crc32.h"
#include "udm_xmalloc.h"
#include "udm_http.h"
#include "udm_host.h"
#include "udm_server.h"
#include "udm_filter.h"
#include "udm_alias.h"
#include "udm_word.h"
#include "udm_crossword.h"
#include "udm_parsehtml.h"
#include "udm_spell.h"
#include "udm_execget.h"
#include "udm_agent.h"
#include "udm_mimetype.h"
#include "udm_doc.h"
#include "udm_result.h"
#include "udm_parsedate.h"
#include "udm_unicode.h"
#include "udm_contentencoding.h"
#include "udm_vars.h"
#include "udm_guesser.h"
#include "udm_textlist.h"
#include "udm_id3.h"
#include "udm_stopwords.h"
#ifdef HAVE_ZLIB
#include "udm_store.h"
#endif

#define UDM_THREADINFO(A,s,m)	if(A->Conf->ThreadInfo)A->Conf->ThreadInfo(A,s,m)

/***************************************************************************/

#define MAXHSIZE	1023*4	/* TUNE */

static int UdmStoreHrefs(UDM_AGENT * Indexer){
	size_t		i,res;
	int		added=0;
	UDM_DOCUMENT	Doc;
	
	UdmDocInit(&Doc);
	for(i=Indexer->Conf->Hrefs.dhrefs;i<Indexer->Conf->Hrefs.nhrefs;i++){
		UDM_HREF *H=&Indexer->Conf->Hrefs.Href[i];
		if(!(H->stored)){
			if(strlen(H->url)<=UDM_URLSIZE){
				UdmVarListReplaceInt(&Doc.Sections,"Referrer-ID",H->referrer);
				UdmVarListReplaceInt(&Doc.Sections,"Hops",H->hops);
				UdmVarListReplaceStr(&Doc.Sections,"URL",H->url?H->url:"");
				UdmVarListReplaceStr(&Doc.Sections,"Tag",H->tag?H->tag:"");
				UdmVarListReplaceStr(&Doc.Sections,"Category",H->category?H->category:"");
				if(UDM_OK!=(res=UdmURLAction(Indexer,&Doc,UDM_URL_ACTION_ADD,Indexer->Conf->db))){
					return(res);
				}
			}
			H->stored=1;
			added++;
		}
	}
	UdmDocFree(&Doc);
	
	/* Remember last stored URL num */
	/* Note that it will became 0   */
	/* after next sort in AddUrl    */
	Indexer->Conf->Hrefs.dhrefs=Indexer->Conf->Hrefs.nhrefs;
	
	/* We should not free URL list with onw database */
	/* to avoid double indexing of the same document */
	/* So, do it if compiled with SQL only           */
	
#ifndef HAVE_FILES
	/* FIXME: this is incorrect with both SQL and built-in compiled */
	if(Indexer->Conf->Hrefs.nhrefs>MAXHSIZE)
		UdmHrefListFree(&Indexer->Conf->Hrefs);
#endif
	return UDM_OK;
}

static void RelLink(UDM_URL *curURL, UDM_URL *newURL, char *str){
	const char	*schema=newURL->schema[0]?newURL->schema:curURL->schema;
	const char	*hostinfo=newURL->hostinfo[0]?newURL->hostinfo:curURL->hostinfo;
	const char	*path=newURL->path[0]?newURL->path:curURL->path;
	const char	*fname=(newURL->filename[0] || newURL->path[0])?newURL->filename:curURL->filename;
	char		pathfile[UDM_URLSIZE + 1];
	
	snprintf(pathfile, UDM_URLSIZE, "%s%s", path, fname);
	
	UdmURLNormalizePath(pathfile);
	
	if(!strcasecmp(schema,"file")||!strcasecmp(schema,"htdb")){
		snprintf(str, UDM_URLSIZE, "%s:%s", schema, pathfile);
	}else{
		snprintf(str, UDM_URLSIZE, "%s://%s%s", schema, hostinfo, pathfile);
	}
	
	if(!UDM_STRNCMP(str,"ftp://")&&(strstr(str,";type=")))
		*(strstr(str,";type"))=0;
}

static int UdmDocBaseHref(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc){
	const char	*basehref=UdmVarListFindStr(&Doc->Sections,"base.href",NULL);
	/* <BASE HREF="xxx"> stuff            */
	/* Check that URL is properly formed  */
	/* baseURL is just temporary variable */
	/* If parsing  fails we'll use old    */
	/* base href, passed via CurURL       */
	/* Note that we will not check BASE     */
	/* if delete_no_server is unset         */
	/* This is  actually dirty hack. We     */
	/* must check that hostname is the same */
	
	if(basehref && (Doc->Spider.follow==UDM_FOLLOW_WORLD)){
		UDM_URL		baseURL;
		int		parse_res;
		
		if(!(parse_res=UdmURLParse(&baseURL,basehref))){
			UdmURLParse(&Doc->CurURL,basehref);
			UdmLog(Indexer,UDM_LOG_ERROR,"BASE HREF '%s'",basehref);
		}else{
			switch(parse_res){
			case UDM_URL_LONG:
				UdmLog(Indexer,UDM_LOG_ERROR,"BASE HREF too long: '%s'",basehref);
				break;
			case UDM_URL_BAD:
			default:
				UdmLog(Indexer,UDM_LOG_ERROR,"Error in BASE HREF URL: '%s'",basehref);
			}
		}
	}
	return UDM_OK;
}

static int UdmConvertHref(UDM_AGENT *Indexer,UDM_URL *CurURL,UDM_HREF *Href){
	int		parse_res;
	UDM_URL		newURL;
	char		newhref[UDM_URLSIZE];
	UDM_ALIAS	*Alias;
	char		alias[UDM_URLSIZE+10]="";
	size_t		aliassize=sizeof(alias);
	size_t		nparts=10;
	UDM_MATCH_PART	Parts[10];
	UDM_SERVER	*Srv;
	char		reason[1024]="";
	char		subnet[32]="?.?.?.?";
	const char	*val;
	
	if((parse_res=UdmURLParse(&newURL,Href->url))){
		switch(parse_res){
			case UDM_URL_LONG:
				UdmLog(Indexer,UDM_LOG_DEBUG,"URL too long: '%s'",Href->url);
				break;
			case UDM_URL_BAD:
			default:
				UdmLog(Indexer,UDM_LOG_DEBUG,"Error in URL: '%s'",Href->url);
		}
	}
	
	RelLink(CurURL,&newURL,newhref);
	
	UdmLog(Indexer,UDM_LOG_DEBUG,"Link '%s' %s",Href->url,newhref);
	if((Alias=UdmAliasFind(&Indexer->Conf->ReverseAliases,newhref,nparts,Parts))){
		UdmAliasApply(alias,aliassize,newhref,Alias,nparts,Parts);
	}
	
	if(alias[0]){
		UdmLog(Indexer,UDM_LOG_DEBUG,"ReverseAlias: '%s'",alias);
		strcpy(newhref,alias);
	}
		
	UdmURLParse(&newURL,newhref);
	
	if (Indexer->Conf->Servers.have_subnets){
		UDM_CONN conn;
		conn.hostname=newURL.hostname;
		conn.port=80;
			
		if(UdmHostLookup(&Indexer->Conf->Hosts,&conn)!=-1){
			unsigned char * h;
			h=(unsigned char*)(&conn.sin.sin_addr);
			snprintf(subnet,sizeof(subnet)-1,"%d.%d.%d.%d",h[0],h[1],h[2],h[3]);
		}
	}
	
	if(!(Srv=UdmServerFind(&Indexer->Conf->Servers,newhref,subnet,NULL))){
		UdmLog(Indexer,UDM_LOG_DEBUG,"no Server, skip it",newhref);
		Href->method=UDM_METHOD_DISALLOW;
		goto ret;
	}
	
	/* Check Allow/Disallow/CheckOnly stuff */
	Href->method=UdmFilterFind(&Indexer->Conf->Filters,newhref,reason);
	if(Href->method==UDM_METHOD_DISALLOW){
		UdmLog(Indexer,UDM_LOG_DEBUG,"%s, skip it",reason);
		goto ret;
	}else{
		UdmLog(Indexer,UDM_LOG_DEBUG,"%s",reason);
	}
	
	/* FIXME: add MaxHops, Robots */
	UDM_FREE(Href->url);
	UDM_FREE(Href->tag);
	UDM_FREE(Href->category);
	Href->url=strdup(newhref);
	Href->tag=(val=UdmVarListFindStr(&Srv->Vars,"Tag",NULL))?strdup(val):NULL;
	Href->category=(val=UdmVarListFindStr(&Srv->Vars,"Category",NULL))?strdup(val):NULL;
ret:	
	return UDM_OK;
}

static int UdmDocConvertHrefs(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc){
	size_t		i;
	int		hops=UdmVarListFindInt(&Doc->Sections,"Hops",0);
	int		url_id=UdmVarListFindInt(&Doc->Sections,"ID",0);
	
	for(i=0;i<Doc->Hrefs.nhrefs;i++){
		UDM_HREF	*Href=&Doc->Hrefs.Href[i];
		UdmConvertHref(Indexer,&Doc->CurURL,Href);
		Href->referrer=url_id;
		Href->stored=0;
		Href->hops=hops+1;
	}
	return UDM_OK;
}

static int UdmDocStoreHrefs(UDM_AGENT *Indexer, UDM_DOCUMENT *Doc){
	size_t i;
	
	UdmDocBaseHref(Indexer,Doc);
	UdmDocConvertHrefs(Indexer,Doc);
	for(i=0;i<Doc->Hrefs.nhrefs;i++){
		UDM_HREF	*Href=&Doc->Hrefs.Href[i];
		if(Href->method!=UDM_METHOD_DISALLOW)
		UdmHrefListAdd(&Indexer->Conf->Hrefs,Href);
	}
	return UDM_OK;
}

/*********************** 'UrlFile' stuff (for -f option) *******************/

__INDLIB__ int UdmURLFile(UDM_AGENT *Indexer, const char *fname,int action){
	FILE *url_file;
	char str[1024]="";
	char str1[1024]="";
	int result,res;
	UDM_URL myurl;
	UDM_HREF Href;
	
	/* Read lines and clear/insert/check URLs                     */
	/* We've already tested in main.c to make sure it can be read */
	/* FIXME !!! Checking should be done here surely              */
	
	if(!strcmp(fname,"-"))
		url_file=stdin;
	else
		url_file=fopen(fname,"r");
	
	while(fgets(str1,sizeof(str1),url_file)){
		char *end;
		if(!str1[0])continue;
		end=str1+strlen(str1)-1;
		while((end>=str1)&&(*end=='\r'||*end=='\n')){
			*end=0;if(end>str1)end--;
		}
		if(!str1[0])continue;
		if(str1[0]=='#')continue;

		if(*end=='\\'){
			*end=0;strcat(str,str1);
			continue;
		}
		strcat(str,str1);
		strcpy(str1,"");

		switch(action){
		case UDM_URL_FILE_REINDEX:
			UdmVarListAddStr(&Indexer->Conf->Vars,"u",str);
			result=UdmURLAction(Indexer,NULL,UDM_URL_ACTION_EXPIRE,Indexer->Conf->db);
			if(result!=UDM_OK)return(result);
			UdmVarListReplaceStr(&Indexer->Conf->Vars,"u","");
			break;
		case UDM_URL_FILE_CLEAR:
			UdmVarListAddStr(&Indexer->Conf->Vars,"u",str);
			result=UdmClearDatabase(Indexer);
			if(result!=UDM_OK)return(UDM_ERROR);
			UdmVarListReplaceStr(&Indexer->Conf->Vars,"u","");
			break;
		case UDM_URL_FILE_INSERT:
			Href.url=str;
			Href.hops=0;
			Href.referrer=0;
			Href.stored=0;
			Href.tag=NULL;
			Href.category=NULL;
			Href.method=UDM_METHOD_GET;
			UdmHrefListAdd(&Indexer->Conf->Hrefs,&Href);
			break;
		case UDM_URL_FILE_PARSE:
			res=UdmURLParse(&myurl,str);
			if((!res)&&(!myurl.schema[0]))
				res=UDM_URL_BAD;
			if(res){
				switch(res){
				case UDM_URL_LONG:
					UdmLog(Indexer,UDM_LOG_ERROR,"URL too long: '%s'",str);
					break;
				case UDM_URL_BAD:
				default:
					UdmLog(Indexer,UDM_LOG_ERROR,"Error in URL: '%s'",str);
				}
				return(UDM_ERROR);
			}
			break;
		}
		str[0]=0;
	}
	if(url_file!=stdin)
		fclose(url_file);
	return(UDM_OK);
}




/*******************************************************************/


static int UdmDocAlias(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc){
	UDM_ALIAS	*Alias;
	UDM_MATCH_PART	Parts[10];
	size_t		nparts=10;
	const char	*alias_prog=UdmVarListFindStr(&Indexer->Conf->Vars,"AliasProg",NULL);
	char		alstr[UDM_URLSIZE]="";
	int		result=UDM_OK;
	const char	*url=UdmVarListFindStr(&Doc->Sections,"URL","");
	
	if(alias_prog){
		result=UdmAliasProg(Indexer,alias_prog,url,alstr,sizeof(alstr)-1);
		UdmLog(Indexer,UDM_LOG_EXTRA,"AliasProg result: '%s'",alstr);
		if(result!=UDM_OK || alstr[0])return result;
	}
	
	/* Find alias when aliastr is empty, i.e.     */
	/* when there is no alias in "Server" command */
	/* and no AliasProg                           */
	if((Alias=UdmAliasFind(&Indexer->Conf->Aliases,url,nparts,Parts))){
		UdmAliasApply(alstr,sizeof(alstr)-1,url,Alias,nparts,Parts);
	}
	if(alstr[0]){
		UdmVarListReplaceStr(&Doc->Sections,"Alias",alstr);
	}
	return UDM_OK;
}




static int UdmDocCheck(UDM_AGENT *Indexer,UDM_SERVER *CurSrv,UDM_DOCUMENT *Doc){
	char		reason[1024]="";
	UDM_HOST_ADDR	*Host;
	int		nerrors=-1;
	int		hops=UdmVarListFindInt(&Doc->Sections,"Hops",0);
	
	switch(UDM_SRV_TYPE(CurSrv->match_type)){
		case UDM_SERVER_STRING:
			UdmLog(Indexer,UDM_LOG_DEBUG,"Realm string '%s'",CurSrv->url);
			break;
		case UDM_SERVER_REGEX:
			UdmLog(Indexer,UDM_LOG_DEBUG,"Realm regex '%s'",CurSrv->url);
			break;
		case UDM_SERVER_SUBNET:
			UdmLog(Indexer,UDM_LOG_DEBUG,"Subnet '%s'",CurSrv->url);
			break;
		case UDM_SERVER_SUBSTR:
		default:
			UdmLog(Indexer,UDM_LOG_DEBUG,"Server '%s'",CurSrv->url);
			break;
	}
	
	/* Check Allow/Disallow/CheckOnly stuff */
	Doc->method=UdmFilterFind(&Indexer->Conf->Filters,UdmVarListFindStr(&Doc->Sections,"URL",""),reason);
	UdmLog(Indexer,UDM_LOG_DEBUG,"%s",reason);
	if(Doc->method==UDM_METHOD_DISALLOW)return UDM_OK;
	
	
	/* Check that hops is less than MaxHops */
	if(hops>CurSrv->Spider.maxhops){
		UdmLog(Indexer,UDM_LOG_WARN,"Too many hops (%d)",hops);
		Doc->method=UDM_METHOD_DISALLOW;
		return UDM_OK;
	}
	
	
	/* Check for too many errors on this server */
	if((Host=UdmHostFind(&Indexer->Conf->Hosts,Doc->CurURL.hostname))){
		nerrors=Host->net_errors;
		Host=NULL;
	}
	
	
	if((nerrors>=CurSrv->Spider.max_net_errors)&&(CurSrv->Spider.max_net_errors)){
		size_t	next_index_time=time(NULL)+CurSrv->Spider.net_error_delay_time;
		char	buf[64];
		
		UdmLog(Indexer,UDM_LOG_WARN,"Too many network errors (%d) for this server",nerrors);
		UdmVarListReplaceInt(&Doc->Sections,"Status",UDM_HTTP_STATUS_SERVICE_UNAVAILABLE);
		UdmTime_t2HttpStr((int)next_index_time, buf);
		UdmVarListReplaceStr(&Doc->Sections,"Next-Index-Time",buf);
		Doc->method=UDM_METHOD_VISITLATER;
		return UDM_OK;
	}
	
	return UDM_OK;
}


static int UdmDocProcessResponseHeaders(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc){
	UDM_VAR	*var;
	size_t	sec;
	
	if(!strcasecmp(UdmVarListFindStr(&Indexer->Conf->Vars,"UseCRC32URLID","no"),"yes") || 
	   !UdmVarListFind(&Doc->Sections,"Content-Type")){
		const char *content_type;
		if((content_type=UdmContentType(&Indexer->Conf->MimeTypes,
			Doc->CurURL.filename && strlen(Doc->CurURL.filename) ?
			Doc->CurURL.filename : "index.html")))
			UdmVarListReplaceStr(&Doc->Sections,"Content-Type",content_type);
	}
	
	if ((var=UdmVarListFind(&Doc->Sections,"Server"))){
		if(!strcasecmp("yes",UdmVarListFindStr(&Indexer->Conf->Vars,"force1251","no"))){
			if (!UDM_STRNCASECMP(var->val,"Microsoft")||!UDM_STRNCASECMP(var->val,"IIS")){
				const char *cs;
				if((cs=UdmCharsetCanonicalName("windows-1251")))
					UdmVarListReplaceStr(&Doc->Sections,"Charset",cs);
			}
		}
	}
	
	
	if((var=UdmVarListFind(&Doc->Sections,"Content-Type"))){
		char * p;
		/* Cat charset part from Content-Type */
		if((p=strstr(var->val,"charset="))){
			const char *cs = UdmCharsetCanonicalName(p+8);
			*p = '\0';
			UdmVarListAddStr(&Doc->Sections,"Charset",cs?cs:(p+8));
			var = UdmVarListFind(&Doc->Sections, "Content-Type"); /* reread var, because of realloc in UdmVarListAddStr */
		}
		UdmRTrim(var->val, "; ");
	}
	
	
	for(sec=0;sec<Doc->Sections.nvars;sec++){
		var=&Doc->Sections.Var[sec];
		
		if(!strcasecmp(var->name,"Location")){
			UDM_URL newURL;
			switch(UdmURLParse(&newURL,var->val)){
				case UDM_URL_OK:
					if(newURL.schema[0]){
						UDM_HREF Href;
						Href.url=var->val;
						Href.hops=UdmVarListFindInt(&Doc->Sections,"Hops",0)+1;
						Href.referrer=UdmVarListFindInt(&Doc->Sections,"Referrer-ID",0);
						Href.stored=0;
						Href.tag=NULL;
						Href.category=NULL;
						Href.method=UDM_METHOD_GET;
						UdmHrefListAdd(&Doc->Hrefs,&Href);
					}
					break;
				case UDM_URL_LONG:
					UdmLog(Indexer,UDM_LOG_ERROR,"Redirect URL too long: '%s'",var->val);
					break;
				case UDM_URL_BAD:
				default:
					UdmLog(Indexer,UDM_LOG_ERROR,"Error in redirect URL: '%s'",var->val);
			}
		}
	}
	return UDM_OK;
}


#if 0	
	/* If mirroring is enabled */
	if ((mirror_period=UdmVarListFindInt(&Doc->Sections,"MirrorPeriod",-1)) >= 0) {
		char errstr[1024]="";
		int  mres;
		
		/* on u_m==0 it returned by mtime from mirrorget */
		/* but we knew that it should be put in mirror  */
		
		mres = UdmMirrorGET(Indexer,Doc,&Doc->CurURL,errstr);
		
		if(!mres){
			UdmLog(Indexer,UDM_LOG_DEBUG,"Taken from mirror");
			found_in_mirror=1;
		}else{
			UdmLog(Indexer,UDM_LOG_DEBUG,"MirrorGet: %s",errstr);
		}
	}
	
	/* Put into mirror if required */
	if ((mirror_period>=0)&&(!found_in_mirror)){
		char errstr[1024]="";
		if(UdmMirrorPUT(Indexer,Doc,&Doc->CurURL,errstr)){
			UdmLog(Indexer,UDM_LOG_DEBUG,"MirrorPUT: %s",errstr);
			result=UDM_ERROR;
		}
	}

#endif



static int UdmDocParseContent(UDM_AGENT * Indexer, UDM_DOCUMENT * Doc){
	
#ifdef USE_PARSER
	UDM_PARSER	*Parser;
#endif
	const char	*real_content_type=NULL;
	const char	*content_type=NULL;
	const char	*url;
	const char	*ce;
	int		result=UDM_OK;
	
	
	if(!(content_type=UdmVarListFindStr(&Doc->Sections,"Content-Type",NULL))) {
		UdmVarListAddStr(&Doc->Sections,"Content-Type","application/octet-stream");
		content_type="application/octet-strem";
	}
	if(!strcmp(Doc->CurURL.filename,"robots.txt"))return UDM_OK;
	url=UdmVarListFindStr(&Doc->Sections,"URL","");
	
	ce=UdmVarListFindStr(&Doc->Sections,"Content-Encoding","");
	
#ifdef HAVE_ZLIB
	if(!strcasecmp(ce,"gzip") || !strcasecmp(ce,"x-gzip")){
		UDM_THREADINFO(Indexer,"UnGzip",url);
		UdmUnGzip(Doc);
		UdmVarListReplaceInt(&Doc->Sections, "Content-Length", Doc->Buf.buf - Doc->Buf.content + (int)Doc->Buf.size);
	}else
	if(!strcasecmp(ce,"deflate")){
		UDM_THREADINFO(Indexer,"Inflate",url);
		UdmInflate(Doc);
		UdmVarListReplaceInt(&Doc->Sections, "Content-Length", Doc->Buf.buf - Doc->Buf.content + (int)Doc->Buf.size);
	}else
	if(!strcasecmp(ce,"compress") || !strcasecmp(ce,"x-compress")){
		UDM_THREADINFO(Indexer,"Uncompress",url);
		UdmUncompress(Doc);
		UdmVarListReplaceInt(&Doc->Sections, "Content-Length", Doc->Buf.buf - Doc->Buf.content + (int)Doc->Buf.size);
	}else
#endif	
	if(!strcasecmp(ce,"identity") || !strcasecmp(ce,"")){
		/* Nothing to do*/
	}else{
		UdmLog(Indexer,UDM_LOG_ERROR,"Unsupported Content-Encoding");
		UdmVarListReplaceInt(&Doc->Sections,"Status",UDM_HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE);
	}
	
	
#ifdef USE_PARSER
	/* Let's try to start external parser for this Content-Type */
	
	if((Parser=UdmParserFind(&Indexer->Conf->Parsers,content_type))){
		UdmLog(Indexer,UDM_LOG_DEBUG,"Found external parser '%s' -> '%s'",
			Parser->from_mime?Parser->from_mime:"NULL",
			Parser->to_mime?Parser->to_mime:"NULL");
	}
	if(Parser && UdmParserExec(Indexer,Parser,Doc)){
		const char * to_charset;
		real_content_type=Parser->to_mime?Parser->to_mime:"unknown";
		UdmLog(Indexer,UDM_LOG_DEBUG,"Parser-Content-Type: %s",real_content_type);
		if((to_charset=strstr(real_content_type,"charset="))){
			to_charset+=8;
			UdmVarListReplaceStr(&Doc->Sections,"Charset",to_charset);
			UdmLog(Indexer,UDM_LOG_DEBUG,"to_charset='%s'",to_charset);
		}
#ifdef DEBUG_PARSER
		fprintf(stderr,"content='%s'\n",Doc->content);
#endif
	}
#endif
	
	
#ifdef HAVE_ZLIB
	if ( UdmVarListFindStr(&Indexer->Conf->Vars,"StoredAddr",NULL) && 
	     (strncmp(UdmVarListFindStr(&Doc->Sections,"Content-Type",""), "text/", 5) == 0)) 
	{
	  UdmStoreDoc(Indexer, Doc);
	}
#endif
	
	if(!real_content_type)real_content_type=content_type;
	UdmVarListAddStr(&Doc->Sections,"Parser-Content-Type",real_content_type);
	
	if(Doc->method==UDM_METHOD_GET ||1){
		if(!UDM_STRNCASECMP(real_content_type,"text/plain")){
			UdmParseText(Indexer,Doc);
		}else
		if(!UDM_STRNCASECMP(real_content_type,"text/tab-separated-values")){
			UdmParseText(Indexer,Doc);
		}else
		if(!UDM_STRNCASECMP(real_content_type,"text/css")){
			UdmParseText(Indexer,Doc);
		}else	
		if(!UDM_STRNCASECMP(real_content_type,"text/html")){
			UdmHTMLParse(Indexer,Doc);
		}else
		if(Doc->is_mp3){
			/* FIXME: Dirty hack: switch to native MP3 parser */
			UdmHTMLParse(Indexer,Doc);
		}else{
			/* Unknown Content-Type  */
			/* CRC32 without headers */
			size_t crclen=Doc->Buf.size - (Doc->Buf.content-Doc->Buf.buf);
			UdmVarListReplaceInt(&Doc->Sections,"crc32",UdmCRC32(Doc->Buf.content,crclen));
		}
	}
	return result;
}



static int UdmDocAddConfExtraHeaders(UDM_ENV *Conf,UDM_DOCUMENT *Doc){
	char		arg[128]="";
	size_t		i;
	const char	*lc;
	
	if(Doc->CurURL.hostname[0]){
		/* Host Name for virtual hosts */
		if(Doc->CurURL.port){
			sprintf(arg,"%s:%d",Doc->CurURL.hostname,Doc->CurURL.port);
			UdmVarListAddStr(&Doc->RequestHeaders,"Host",arg);
		}else{
			UdmVarListAddStr(&Doc->RequestHeaders,"Host",Doc->CurURL.hostname);
		}
	}
	
	/* Add Conf user defined headers */
	for( i=0 ; i<Conf->ExtraHeaders.nvars ; i++){
		UDM_VAR *Hdr=&Conf->ExtraHeaders.Var[i];
		UdmVarListAddStr(&Doc->RequestHeaders,Hdr->name,Hdr->val);
	}
	
	/* If LocalCharset specified, add Accept-Charset header */
	if((lc=UdmVarListFindStr(&Conf->Vars,"LocalCharset",NULL))){
		snprintf(arg,sizeof(arg)-1,"%s;q=1.0, *;q=0.9, utf-8;q=0.8",lc);
		arg[sizeof(arg)-1]='\0';
		UdmVarListAddStr(&Doc->RequestHeaders,"Accept-Charset",arg);
	}
	
#ifdef HAVE_ZLIB
	UdmVarListAddStr(&Doc->RequestHeaders,"Accept-Encoding","gzip,deflate,compress");
#endif
	return UDM_OK;
}

static int UdmDocAddServExtraHeaders(UDM_SERVER *Server,UDM_DOCUMENT *Doc){
	char	arg[128]="";
	size_t	i;
	
	for( i=0 ; i<Server->ExtraHeaders.nvars ; i++){
		UDM_VAR *Hdr=&Server->ExtraHeaders.Var[i];
		
		if(!strcasecmp(Hdr->name,"Authorization")){
			/* HTTP and FTP specific stuff */
			if((!strcasecmp(Doc->CurURL.schema,"http"))||
				(!strcasecmp(Doc->CurURL.schema,"https"))||
				(!strcasecmp(Doc->CurURL.schema,"ftp"))||
				(!strcasecmp(Doc->CurURL.schema,"https"))){
		
				/* Auth if required */
				if(Hdr->val && Hdr->val[0]){
					snprintf(arg,sizeof(arg)-1,"Basic %s\r\n",Hdr->val);
					arg[sizeof(arg)-1]='\0';
					UdmVarListAddStr(&Doc->RequestHeaders,"Authorization",arg);
				}
			}
			
			if(!strcasecmp(Doc->CurURL.schema,"nntp") || !strcasecmp(Doc->CurURL.schema,"news")){
				/* Auth if required                      */
				/* NNTPGet will parse this header        */
				/* We'll pass authinfo still in base64   */
				/* form to avoid plain user name in core */
				/* file on crashes if any                */
				
				if(Hdr->val && Hdr->val[0]){
					UdmVarListAddStr(&Doc->RequestHeaders,"Authorization",Hdr->val);
				}
			}
		}else
		if(!strcasecmp(Hdr->name,"Proxy-Authorization")){
			if(Hdr->val && Hdr->val[0]){
				snprintf(arg,sizeof(arg)-1,"Basic %s\r\n",Hdr->val);
				arg[sizeof(arg)-1]='\0';
				UdmVarListAddStr(&Doc->RequestHeaders,"Proxy-Authorization",arg);
			}
		}else{
			UdmVarListAddStr(&Doc->RequestHeaders,Hdr->name,Hdr->val);
		}
		
	}
	return UDM_OK;
}


static int UdmDocLookupConn(UDM_AGENT *Indexer,UDM_DOCUMENT *Doc){
	const char *proxy;
	
	if((proxy=UdmVarListFindStr(&Doc->RequestHeaders,"Proxy",NULL))){
		char *port;
		
		Doc->connp.hostname=strdup(proxy);
		if((port=strchr(Doc->connp.hostname,':'))){
			*port++='\0';
			Doc->connp.port=atoi(port);
		}else{
			Doc->connp.port=3128;
		}
	}else{
		Doc->connp.hostname=strdup(Doc->CurURL.hostname);
		Doc->connp.port=Doc->CurURL.port?Doc->CurURL.port:Doc->CurURL.default_port;
	}
				
	if(Doc->CurURL.hostname[0] && UdmHostLookup(&Indexer->Conf->Hosts,&Doc->connp)){
		UdmLog(Indexer,UDM_LOG_WARN,"Can't resolve host '%s'",Doc->connp.hostname);
		Doc->method = UDM_METHOD_DISALLOW; /* delete, it may be mistaped or no longer exist host */
		UdmVarListReplaceInt(&Doc->Sections,"Status",UDM_HTTP_STATUS_NOT_FOUND);
	}
	return UDM_OK;
}



static int UdmNextTarget(UDM_AGENT * Indexer,UDM_DOCUMENT *Result,void *db){
	int	result=UDM_NOTARGET;
	
	if (Indexer->Conf->url_number<=0){
		return UDM_NOTARGET;
	}
	
	/* Load targets into memory cache */
	if(Indexer->Conf->Targets.cur_row>=Indexer->Conf->Targets.num_rows){
		UdmResultFree(&Indexer->Conf->Targets);
		result=UdmResAction(Indexer,&Indexer->Conf->Targets,UDM_RES_ACTION_TARGETS,db);
		if(result!=UDM_OK)return result;
	}
	
	/* Choose next target */
	if(Indexer->Conf->Targets.num_rows){
		size_t i;
		UDM_DOCUMENT *Doc=&Indexer->Conf->Targets.Doc[Indexer->Conf->Targets.cur_row];
		for(i=0;i<Doc->Sections.nvars;i++){
			UdmVarListAdd(&Result->Sections,&Doc->Sections.Var[i]);
		}
		for(i=0;i<Indexer->Conf->Sections.nvars;i++){
			UdmVarListAdd(&Result->Sections,&Indexer->Conf->Sections.Var[i]);
		}
		Indexer->Conf->Targets.cur_row++;
		Indexer->Conf->url_number--;
		return UDM_OK;
	}
	
	return UDM_NOTARGET;
}




__INDLIB__ int UdmIndexNextURL(UDM_AGENT *Indexer){
	int		result=UDM_OK;
	UDM_DOCUMENT	Doc;
	const char	*url, *alias;
	char		origurl[UDM_URLSIZE]="";
	char		aliasurl[UDM_URLSIZE]="";
	
	UdmDocInit(&Doc);
	
	UDM_THREADINFO(Indexer,"Selecting","");
	UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
	if(UDM_OK==(result=UdmStoreHrefs(Indexer)))
		result=UdmNextTarget(Indexer,&Doc,Indexer->Conf->db);
	UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
	
	if(result==UDM_NOTARGET){
		int rc;
		UdmDocFree(&Doc);	/* To free Doc.connp->connp */
		if(UDM_OK!=(rc=UdmURLAction(Indexer,NULL,UDM_URL_ACTION_FLUSH,Indexer->Conf->db)))
			return rc;
		else
			return result;
	}
	
	if(result!=UDM_OK){
		UdmDocFree(&Doc);
		return result;
	}
	
	url=UdmVarListFindStr(&Doc.Sections,"URL","");
	UdmVarListReplaceInt(&Doc.Sections,"crc32old",UdmVarListFindInt(&Doc.Sections,"crc32",0));
	UdmLog(Indexer,UDM_LOG_INFO,"URL: %s",url);
	
#ifdef HAVE_SETPROCTITLE
	/* To see the URL being indexed in "ps" output on FreeBSD */
	/* Do it if single thread version */
	if(!(Indexer->handle)) setproctitle("%s",url);
#endif
	
	
	/* Collect information from Conf */
	UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
	
	if(!Doc.Buf.buf){
		/* Alloc buffer for document */
		Doc.Buf.maxsize=(size_t)UdmVarListFindInt(&Indexer->Conf->Vars,"MaxDocSize",UDM_MAXDOCSIZE);
		Doc.Buf.buf=(char*)malloc(Doc.Buf.maxsize + 1);
		Doc.Buf.buf[0]='\0';
	}
	
	/* Check that URL has valid syntax */
	if(UdmURLParse(&Doc.CurURL,url)){
		UdmLog(Indexer,UDM_LOG_WARN,"Invalid URL: %s",url);
		Doc.method = UDM_METHOD_DISALLOW;
	}else
	if (!strcmp(Doc.CurURL.filename,"robots.txt")) {
		Doc.method = UDM_METHOD_DISALLOW;
	}else{
		UDM_SERVER	*Server;
		UDM_CONN	conn;
		char		subnet[32];
		char		alstr[UDM_URLSIZE]="";
		
		conn.hostname=Doc.CurURL.hostname;
		conn.port=80;
		if(UdmHostLookup(&Indexer->Conf->Hosts,&conn)!=-1){
			unsigned char * h;
			h=(unsigned char*)(&conn.sin.sin_addr);
			snprintf(subnet,sizeof(subnet)-1,"%d.%d.%d.%d",h[0],h[1],h[2],h[3]);
		}else{
			strcpy(subnet,"?.?.?.?");
		}
		
		/* Find correspondent Server */
		if(!(Server=UdmServerFind(&Indexer->Conf->Servers,url,subnet,alstr)) ) {
			UdmLog(Indexer,UDM_LOG_WARN,"No 'Server' command for url");
			Doc.method = UDM_METHOD_DISALLOW;
		}else{
			size_t	i;
			
			Doc.lcs=Indexer->Conf->lcs;
			Doc.Spider=Server->Spider;
			UdmDocAddServExtraHeaders(Server,&Doc);
			UdmDocAddConfExtraHeaders(Indexer->Conf,&Doc);
			for(i=0;i<Server->Vars.nvars;i++)
				UdmVarListAdd(&Doc.Sections,&Server->Vars.Var[i]);
			
			/* Check hops, network errors, filters */
			result=UdmDocCheck(Indexer,Server,&Doc);
			
			if(alstr[0]){
				/* Server Primary alias found */
				UdmVarListReplaceStr(&Doc.Sections,"Alias",alstr);
			}else{
				/* Apply non-primary alias */
				result=UdmDocAlias(Indexer,&Doc);
			}
		}
	}
	UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
	
	
	if(result!=UDM_OK){
		UdmDocFree(&Doc);
		return result;
	}
	
	if((alias=UdmVarListFindStr(&Doc.Sections,"Alias",NULL))){
		const char *u=UdmVarListFindStr(&Doc.Sections,"URL",NULL);
		strncpy(origurl,u,sizeof(origurl)-1);
		origurl[sizeof(origurl)-1]='\0';
		strncpy(aliasurl,alias,sizeof(aliasurl)-1);
		aliasurl[sizeof(aliasurl)-1]='\0';
		UdmLog(Indexer,UDM_LOG_EXTRA,"Alias: '%s'",alias);
	}
	
	
	if(Doc.method!=UDM_METHOD_DISALLOW && Doc.method!=UDM_METHOD_VISITLATER){
		if(aliasurl[0]){
			UdmVarListReplaceStr(&Doc.Sections,"URL",alias);
			UdmURLParse(&Doc.CurURL,alias);
		}
		if(!strcmp(Doc.CurURL.schema,"https") || !strncmp(Doc.CurURL.schema,"http",4)){
			if(!Doc.Spider.use_robots){
				UdmLog(Indexer,UDM_LOG_WARN,"robots.txt support is disallowed for '%s'",Doc.CurURL.hostinfo);
				result=UdmRobotParse(&Indexer->Conf->Robots,NULL,Doc.CurURL.hostinfo);
			}else{
				UDM_ROBOT_RULE	*rule;
				int		take_robots;
				
				UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
				
				take_robots=!UdmRobotFind(&Indexer->Conf->Robots,Doc.CurURL.hostinfo);
				
				if(take_robots){
					char		rurl[UDM_URLSIZE];
					UDM_DOCUMENT	rDoc;
					int		status;
					
					UdmDocInit(&rDoc);
					rDoc.Buf.maxsize=Doc.Buf.maxsize;
					rDoc.Spider=Doc.Spider;
					rDoc.Buf.buf=(char*)malloc(Doc.Buf.maxsize + 1);
					rDoc.Buf.buf[0]='\0';
					
					snprintf(rurl,sizeof(rurl)-1,"%s://%s/robots.txt", Doc.CurURL.schema, Doc.CurURL.hostinfo);
					rurl[sizeof(rurl)-1]='\0';
					UdmVarListAddStr(&rDoc.Sections,"URL",rurl);
					UdmURLParse(&rDoc.CurURL,rurl);
					
					if(rDoc.CurURL.hostname[0]){
						char		arg[128]="";
						/* Host Name for virtual hosts */
						if(rDoc.CurURL.port){
							snprintf(arg,sizeof(arg)-1, "%s:%d", rDoc.CurURL.hostname, rDoc.CurURL.port);
							arg[sizeof(arg)-1]='\0';
							UdmVarListAddStr(&rDoc.RequestHeaders, "Host", arg);
						}else{
							UdmVarListAddStr(&rDoc.RequestHeaders, "Host", rDoc.CurURL.hostname);
						}
					}
					
					UDM_THREADINFO(Indexer,"Getting",rurl);
					
					UdmDocLookupConn(Indexer,&rDoc);
					result=UdmGetURL(Indexer,&rDoc);
					
					UdmParseHTTPResponse(Indexer,&rDoc);
					UdmDocProcessResponseHeaders(Indexer,&rDoc);
					
					status=UdmVarListFindInt(&rDoc.Sections,"Status",0);
					UdmLog(Indexer,UDM_LOG_INFO,"Get: %s (%d)",rurl,status);
					
					if(status == 0) {
						Doc.method = UDM_METHOD_VISITLATER;
					}
					
					UdmRobotAddEmpty(&Indexer->Conf->Robots, Doc.CurURL.hostinfo);
					result=UdmRobotParse(&Indexer->Conf->Robots,rDoc.Buf.content,rDoc.CurURL.hostinfo);
					UdmDocFree(&rDoc);
				}
				
				/* Check whether URL is disallowed by robots.txt */
				if((rule=UdmRobotRuleFind(&Indexer->Conf->Robots,&Doc.CurURL))){
					UdmLog(Indexer,UDM_LOG_WARN,"robots.txt: '%s %s'",(rule->cmd==UDM_METHOD_DISALLOW)?"Disallow":"Allow",rule->path);
					if(rule->cmd==UDM_METHOD_DISALLOW)
						Doc.method=rule->cmd;
				}
				
				UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
			}
		}
		if(origurl[0]){
			UdmVarListReplaceStr(&Doc.Sections,"URL",origurl);
			UdmURLParse(&Doc.CurURL,origurl);
		}
	}
	
	
	if(result!=UDM_OK){
		UdmDocFree(&Doc);
		return result;
	}
	
	if(Doc.method!=UDM_METHOD_DISALLOW && Doc.method!=UDM_METHOD_VISITLATER){
		int	start,state,status=0;
		
		if(!(Indexer->flags&UDM_FLAG_REINDEX)){
			const char *l=UdmVarListFindStr(&Doc.Sections,"Last-Modified",NULL);
			if(l)UdmVarListAddStr(&Doc.RequestHeaders,"If-Modified-Since",l);
		}
		
		start=(Doc.method==UDM_METHOD_CHECKMP3 || 
		       Doc.method==UDM_METHOD_CHECKMP3ONLY) &&
		     (!strcasecmp(Doc.CurURL.schema,"http"))?2:0;
		
		for(state=start;state>=0;state--){
			char		hdr[]="Range";
			char		arg2[]="bytes=0-256";
			char		arg1[]="bytes=-128";
			size_t		except_len;
			
			UdmVarListEnter(&Doc.RequestHeaders);
			
			/* Add new "Range" header */
			switch(state){
			case 2:
				UdmVarListAddStr(&Doc.RequestHeaders,hdr,arg2);
				UdmLog(Indexer,UDM_LOG_INFO,"URL: [%s] %s",arg2,url);
				except_len=257;
				break;
			case 1:
				UdmVarListAddStr(&Doc.RequestHeaders,hdr,arg1);
				UdmLog(Indexer,UDM_LOG_INFO,"URL: [%s] %s",arg1,url);
				except_len=128;
				break;
			default:
				break;
			}
			
			UDM_THREADINFO(Indexer,"Getting",url);
			UdmVarListReplaceInt(&Doc.Sections, "Status", UDM_HTTP_STATUS_UNKNOWN);
			
			if(aliasurl[0]){
				UdmVarListReplaceStr(&Doc.Sections,"URL",alias);
				UdmURLParse(&Doc.CurURL,alias);
			}
			
			if(!strcasecmp(Doc.CurURL.schema,"htdb")){
				UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
				result=UdmURLAction(Indexer,&Doc,UDM_URL_ACTION_HTDBGET,Indexer->Conf->db);
				UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
			}else{
				UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
				UdmDocLookupConn(Indexer,&Doc);
				UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
				result=UdmGetURL(Indexer,&Doc);
			}
			
			if(origurl[0]){
				UdmVarListReplaceStr(&Doc.Sections,"URL",origurl);
				UdmURLParse(&Doc.CurURL,origurl);
			}
			
			if(result!=UDM_OK){
				UdmDocFree(&Doc);
				return result;
			}
			
			UdmParseHTTPResponse(Indexer,&Doc);
			UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
			UdmDocProcessResponseHeaders(Indexer,&Doc);
			UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
			
			UdmVarListLeave(&Doc.RequestHeaders);
			
			status=UdmVarListFindInt(&Doc.Sections,"Status",0);
			
			UdmLog(Indexer,UDM_LOG_EXTRA,"Status: %d %s, Content-type: %s, Response size: %d, Content-Length: %d", 
				status, UdmHTTPErrMsg(status),
				UdmVarListFindStr(&Doc.Sections,"Content-Type","NULL"),
				Doc.Buf.size,
				UdmVarListFindInt(&Doc.Sections,"Content-Length",0));
			
			
			if(status!=206 && status!=200)
				break;
			
			if(state>0){
				char	*mp3html;
				
				if((mp3html=get_id3_tag(&Doc))){
					strcpy(Doc.Buf.content,mp3html);
					free(mp3html);
					break;
				}
				
				if(!Doc.is_mp3){
					state=0;
					continue;
				}
			}
		}
		
		/* Add URL from Location: header */
		/* This is to give a chance for  */
		/* a concurent thread to take it */
		UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
		result=UdmDocStoreHrefs(Indexer,&Doc);
		UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
		if(result!=UDM_OK){
			UdmDocFree(&Doc);
			return result;
		}
		
		/* Increment indexer's download statistics */
		Indexer->nbytes+=Doc.Buf.size;
		
		if((!Doc.Buf.content) && (status<500)) {
			UdmLog(Indexer,UDM_LOG_ERROR,"Illegal HTTP headers in response");
			status=UDM_HTTP_STATUS_SERVICE_UNAVAILABLE;
			UdmVarListReplaceInt(&Doc.Sections,"Status",status);
		}
		
		if(status==UDM_HTTP_STATUS_OK || status==UDM_HTTP_STATUS_PARTIAL_OK){
		   	size_t		wordnum;
		   	
			UDM_THREADINFO(Indexer,"Parsing",url);
			
			result=UdmDocParseContent(Indexer,&Doc);
			if(result!=UDM_OK){
				UdmDocFree(&Doc);
				return result;
			}
			
			UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
			if(Indexer->Conf->LangMaps.nmaps){
				size_t t;
				bzero(Indexer->LangMap, sizeof(UDM_LANGMAP));
				
				for(t=0;t<Doc.TextList.nitems;t++){
					UDM_TEXTITEM *Item=&Doc.TextList.Item[t];
					UdmBuildLangMap(Indexer->LangMap,Item->str,strlen(Item->str),0);
				}
				
				UdmGuessCharSet(&Doc,&Indexer->Conf->LangMaps,Indexer->LangMap);
				
				UdmLog(Indexer, UDM_LOG_EXTRA, "Guesser: Lang: %s, Charset: %s",
					UdmVarListFindStr(&Doc.Sections,"Content-Language",""),
					UdmVarListFindStr(&Doc.Sections,"Charset",""));
			}
			UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
			
			UdmParseURLText(Indexer,&Doc);
			UdmParseHeaders(Indexer,&Doc);
			if (Doc.method!=UDM_METHOD_HREFONLY)
				UdmPrepareWords(Indexer,&Doc);
			
			/* Remove StopWords */
			UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
			for(wordnum=0;wordnum<Doc.Words.nwords;wordnum++){
				const char	*w=Doc.Words.Word[wordnum].word;
				UDM_STOPWORD	*sw;
				size_t		wlen=strlen(w);
				
				if(wlen>Indexer->Conf->WordParam.max_word_len ||
				   wlen<Indexer->Conf->WordParam.min_word_len ||
				   (sw=UdmStopListFind(&Indexer->Conf->StopWords,w)))
				{
					Doc.Words.Word[wordnum].coord=0;
				}	
			}
			for(wordnum=0;wordnum<Doc.CrossWords.ncrosswords;wordnum++){
				const char	*w=Doc.CrossWords.CrossWord[wordnum].word;
				UDM_STOPWORD	*sw;
				size_t		wlen=strlen(w);
				
				if(wlen>Indexer->Conf->WordParam.max_word_len ||
				   wlen<Indexer->Conf->WordParam.min_word_len ||
				   (sw=UdmStopListFind(&Indexer->Conf->StopWords,w)))
				{
					Doc.CrossWords.CrossWord[wordnum].weight=0;
				}	
			}
			UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
		}
	}
	
	UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
	if(UDM_OK==(result=UdmDocStoreHrefs(Indexer,&Doc))){
		if(UDM_OK!=(result=UdmStoreHrefs(Indexer)))
			result=UDM_ERROR;
	}
	UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
	
	if(result!=UDM_OK){
		UdmDocFree(&Doc);
		return result;
	}
	
	/* Free unnecessary information */
	UdmHrefListFree(&Doc.Hrefs);
	UdmVarListFree(&Doc.RequestHeaders);
	UdmTextListFree(&Doc.TextList);
	UDM_FREE(Doc.Buf.buf);
	Doc.Buf.maxsize=0;
	
	UDM_GETLOCK(Indexer,UDM_LOCK_CONF);
	result=UdmURLAction(Indexer,&Doc,UDM_URL_ACTION_FLUSH,Indexer->Conf->db);
	UDM_RELEASELOCK(Indexer,UDM_LOCK_CONF);
	
	return(result);
}
